***********************************************
* Authors: Henrik Andersson & Sirus Dehdari
* This do-file generates Table B5 in the Appendix
*****************************************************

clear all
set more off

*1. ///Use workplace data that we took out; keep only workplace ID and 
///basic worker info

* Command for randomly sampling the data without dropping observations:
ssc install randomtag


use "E:\ProjData\IntegrationSD\temp\workplace_2006", clear
sum matchedcw
keep LopNr sh_im_cw_noi_swe
drop if LopNr == .

foreach r of num 1/1000{
randomtag, count(3143052) gen(rand_`r')
}

foreach var of varlist rand_*{
quietly replace `var' = . if `var' == 0
quietly replace `var' = sh_im_cw_noi_swe if `var' == 1
}

gen sampl=1
merge 1:1 LopNr using "E:\ProjData\IntegrationSD\temp\2006_sampleofind.dta"
keep if sampl==1 & S==1
drop _merge
save "E:\ProjData\IntegrationSD\temp\random_bmark_workplace_2006", replace



use "E:\ProjData\IntegrationSD\temp\workplace_2010", clear
sum matchedcw
keep LopNr sh_im_cw_noi_swe
drop if LopNr == .

foreach r of num 1/1000{
randomtag, count(3155773) gen(rand_`r')
}

foreach var of varlist rand_*{
quietly replace `var' = . if `var' == 0
quietly replace `var' = sh_im_cw_noi_swe if `var' == 1
}

gen sampl=1
merge 1:1 LopNr using "E:\ProjData\IntegrationSD\temp\2010_sampleofind.dta"
keep if sampl==1 & S==1
drop _merge
save "E:\ProjData\IntegrationSD\temp\random_bmark_workplace_2010", replace




use "E:\ProjData\IntegrationSD\temp\workplace_2014", clear
sum matchedcw
keep LopNr sh_im_cw_noi_swe
drop if LopNr == .

foreach r of num 1/1000{
randomtag, count(2640578) gen(rand_`r')
}

foreach var of varlist rand_*{
quietly replace `var' = . if `var' == 0
quietly replace `var' = sh_im_cw_noi_swe if `var' == 1
}

gen sampl=1
merge 1:1 LopNr using "E:\ProjData\IntegrationSD\temp\2014_sampleofind.dta"
keep if sampl==1 & S==1
drop _merge
save "E:\ProjData\IntegrationSD\temp\random_bmark_workplace_2014", replace








*2. ///Now merge with electiondistrict-ids; and collapse 
clear
foreach y in 2006 2010 2014 {
*first the random set of workplaces
use "E:\ProjData\IntegrationSD\temp\random_bmark_workplace_`y'.dta" 

///Valdeltagardata

merge 1:1 LopNr using "E:\ProjData\IntegrationSD\temp/`y'_electiondistrictsperperson.dta"
keep if _merge==3
drop _merge 

compress
ds, not(type long) 
local varlist `r(varlist)'
display "`varlist'"
collapse (mean) `varlist' , by(llkkdddd)

save "E:\ProjData\IntegrationSD\temp\workplace_random`y'2.dta", replace

}

*3.///HAVING DONE THIS WE MUST REDO THE COMPUTE SHARE PART


///Merge onto mapped files for the mapped districts

*2010-2014
use "E:\ProjData\IntegrationSD\temp\workplace_random20102.dta"
gen str8 precinct2010 = string(llkkdddd,"%08.0f")
drop llkkdddd

merge 1:m precinct2010 using "E:\ProjData\IntegrationSD\temp\vd_2010_2014_shares"
drop _merge

order precinct2010 whole_pop precinct2014 totbef share
save "E:\ProjData\IntegrationSD\temp\vd_2010_2014_shares_r_alt2", replace


*2006-2014

use "E:\ProjData\IntegrationSD\temp\workplace_random20062.dta"
gen str8 precinct2006 = string(llkkdddd,"%08.0f")
drop llkkdddd

merge 1:m precinct2006 using "E:\ProjData\IntegrationSD\temp\vd_2006_2014_shares"
drop _merge

order precinct2006 whole_pop precinct2014 totbef share
save "E:\ProjData\IntegrationSD\temp\vd_2006_2014_shares_r_alt2", replace


///NOW TO ONE FILE
clear
cd "E:\"

* Setting path to temp folder:
local out_temp "E:\ProjData\IntegrationSD\temp\"


*** Henrik: här specificerar du namnet på den variabel som ligger allra sist i din variabellista.
* Variabeln "pop" ligger ju först, så vi sparar en local med namnet på den som ligger sist
local last_var = "rand_1000"


*** Cleaning 2010 election data (changing precinct code and variable name):
use `out_temp'2010_electionresults, clear

* Adding "0" to districts where it has been dropped because of numeric:
tostring llkkdddd, replace
replace llkkdddd = "0" + llkkdddd if strlen(llkkdddd) < 8
rename llkkdddd precinct2010

* Merging election data with the precinct combos:
merge 1:m precinct2010 using `out_temp'\vd_2010_2014_shares_r_alt2.dta

drop _merge

* Multiplying each election result (in numbers) with combo shares:
foreach var of varlist sh_im_cw_noi_swe-`last_var'{
quietly replace `var' = share*`var'
}

* Aggregate to 2014 precints:
collapse (sum) sh_im_cw_noi_swe-`last_var', by(precinct2014)

gen year = 2010


save `out_temp'reg_2010_2014_alt2, replace



*************************
*** 2006:
clear
cd "E:\"

* Setting path to temp folder:
local out_temp "E:\ProjData\IntegrationSD\temp\"

local last_var = "rand_1000"


import excel "D:\Data\ExtData\ValResData\Riksdagsval_2006\Riksdagsval_2006.xls", sheet("riksdagsvalet_vd_2006_orginal") firstrow clear

rename LKFV precinct2006

merge 1:m precinct2006 using `out_temp'vd_2006_2014_shares_r_alt2, keep(3)
drop _merge

foreach var of varlist sh_im_cw_noi_swe-`last_var'{
quietly replace `var' = share*`var'
}

collapse (sum) sh_im_cw_noi_swe- `last_var', by(precinct2014)

gen year = 2006

save `out_temp'reg_2006_2014_alt2, replace


///CREATE PANEL

///Now put the 2010, 2006 and 2014 file together and prepare panel

clear all
use "E:\ProjData\IntegrationSD\temp\workplace_random20142.dta"
gen year=2014 
 append using "E:\ProjData\IntegrationSD\temp\reg_2010_2014_alt2.dta"
gen str8 z = string(llkkdddd,"%08.0f")

 replace precinct2014=z if year==2014
 drop llkkdddd z
  append using "E:\ProjData\IntegrationSD\temp\reg_2006_2014_alt2.dta"
  replace year=2006 if year==.

  *time variable
gen t=1 if year==2006
replace t=2 if year==2010
replace t=3 if year==2014

*panel setting
order precinct2014 year t
destring precinct2014, replace
xtset precinct2014 t
sort precinct2014 t

*duplicates tag precinct2014, gen(tag)
*bysort year: tab tag
*5,044 districts in 2010 and 2014 both years
*4712 districts all years
*272 enbart 2010
*709 enbart 2006
*659 enbart 2014

merge 1:1 precinct2014 year using "E:\ProjData\IntegrationSD\use\main.dta"
drop _merge
save "E:\ProjData\IntegrationSD\use\randomdata_main_alt2.dta", replace




*5. ///NOW THE ANALYSIS
use "E:\ProjData\IntegrationSD\use\randomdata_main_alt2.dta"
sort precinct2014 t
///GENERATE SOME MORE VARIABLES

///THE POLITICAL MEASURE (PERCENT) ARE DIFFERENT IN 10 AND 14 - CHANGE
foreach y in Mproc Cproc FPproc KDproc Sproc Vproc MPproc SDproc FIproc PPproc {
replace `y' = `y'*100 if year==2014
}

*INDPENDENT
local last_var = "rand_1000"

foreach y of numlist 1(1)1000 {
replace rand_`y' = rand_`y'*100
}


*Other variables
gen unem_proc_swed = (unemployed_swedish/swedish)*100
gen other_share = (other_im/pop)*100

gen low_educ_share = (low_educ/pop)*100
gen high_educ_share = (high_educ/pop)*100

gen pop2 = pop^2

gen ctz_other_share = (ctz_other/other_im)*100

gen logwage = ln(LoneInk)
*share young
gen young_share = (young/pop)*100

*Workerdata
gen dother_high_educ = d.other_high_educ
gen other_high_educ_share = (other_high_educ/other_im)*100
gen other_low_educ_share = (other_low_educ/other_im)*100
gen dother_high_educ_share = d.other_high_educ_share

replace share_im_cworker_swe = share_im_cworker_swe*100
replace sh_im_cw_noi_swe = sh_im_cw_noi_swe*100

///regions
foreach x in latin mena asia africa rest nordic western {
replace share_`x'_cworker_swe = share_`x'_cworker_swe*100
}

///TYPE YEAR
foreach x in newim mediumnewim notnewim oldim highskother lowskother {
replace share_`x'_cworker_swe = share_`x'_cworker_swe*100
}

///sector:
replace sector_share_im = sector_share_im*100

*Municipality
gen str8 str_dstr = string(precinct2014,"%08.0f")
gen kom_str = substr(str_dstr, 1,4)
destring kom_str, replace
*Add on labor market regions
merge m:1 kom_str using "E:\ProjData\IntegrationSD\temp\lmr2014"
drop _merge 
encode Kod, gen(lmr14)
drop Kod


***************
* ANALYSIS *
*************

*************************
* baseline table *
*************************


///regular - no float environment for paper

*standard explanatory variable
foreach y of numlist 1(1)1000 {
egen std_treat`y' = std(rand_`y')
}

set matsize 11000

	matrix A = J(8000,4,.)
* Next, loop over numbers from 5-25 and run the regression for each bandwidth.


	forvalues y=1/1000{

	 *scalar foreignshare = `n'
xtreg SDproc std_treat`y' pop pop2 logwage low_educ_share ALosDag ctz_other_share ///
unem_proc_swed other_share other_low_educ_share wage_cw_wp share_male_cw_wp share_young_cw_wp i.year##i.lmr14, fe vce(cluster precinct2014)
			matrix A[`y',1] = _b[std_treat`y']
			matrix A[`y',2] = _b[std_treat`y'] + 1.96*_se[std_treat`y']
			matrix A[`y',3] = _b[std_treat`y'] - 1.96*_se[std_treat`y']
			matrix A[`y',4] = `y'
		}
	svmat A

	rename A1 estimate2
	rename A2 upper2
	rename A3 lower2
	rename A4 n

graph twoway rcap upper2 lower2 n, lstyle(ci) ||  scatter estimate2 n, mstyle(p1) legend(off) graphregion(color(white)) ytitle(Coefficient Size) xtitle("")
graph export "C:\Userdata\Shared\Output\IntegrationSD\random_me.pdf", replace


*eststo clear
*eststo: xtreg SDproc std_treat pop pop2 logwage low_educ_share ALosDag ctz_other_share ///
*unem_proc_swed other_share other_low_educ_share wage_cw_wp share_male_cw_wp share_young_cw_wp i.year##i.lmr14, fe vce(cluster precinct2014)
*eststo: xtreg SDproc sh_im_cw_noi_swe pop pop2 logwage low_educ_share ALosDag ctz_other_share ///
*unem_proc_swed other_share other_low_educ_share wage_cw_wp share_male_cw_wp share_young_cw_wp i.year##i.lmr14, fe vce(cluster precinct2014)

*esttab using "C:\Userdata\Shared\Output\IntegrationSD\regmeerror.tex", ///
*se noconstant compress label scalars(N) /// 
*keep(std_treat) fragment /// 
*nonotes mlabels(none) ///
*replace 

*measurement error estimates and confidence interval
sum estimate2 upper2 lower2
estpost tabstat estimate2 upper2 lower2 , ///
statistics(mean sd min max n) columns(statistics) 

esttab using "C:\Userdata\Shared\Output\IntegrationSD\sumstat_me.tex", replace ///
nonumbers float noobs label cells("mean(fmt(2)) sd min max count(fmt(0))")

